import pandas as pd
import numpy as np
from io import StringIO
#import benford as bf
#import matplotlib.pyplot as plt
from urllib.error import HTTPError
import altair as alt
import yfinance as yf
from os import environ
try:
# for local execution
apiKeyFromFile = open("/Users/kyledunn/fredApiKey.txt", "r").read().strip()
except FileNotFoundError:
apiKeyFromFile = None
pass
# for CI
apiKey = environ.get("FRED_API_KEY", apiKeyFromFile)
from io import BytesIO
from zipfile import ZipFile
from urllib.request import urlopen
def getSeries(series="", apiKey=None, description=None):
fetchCommand = "https://api.stlouisfed.org/fred/series/observations?series_id={s}&realtime_end=9999-12-31&api_key={k}&file_type=txt"
resp = urlopen(fetchCommand.format(s=series, k=apiKey))
zipfile = ZipFile(BytesIO(resp.read()))
filesInZip = zipfile.namelist()
data = zipfile.open(filesInZip[1])
if description is None:
description = series
df = pd.read_csv(data, sep="\t", header=None, skiprows=1,
names=["date", description, "rt_start", "rt_end"], na_values=".")
df['date'] = pd.to_datetime(df.date)
return df.set_index("date")
dji = yf.Ticker("^DJI")
# get historical market data
df_dji = dji.history(period="max")
subset = df_dji[df_dji.index <= '1995-12-31']
y = subset.Close.values
x = [n for n, v in enumerate(subset.index.values)]
z = np.polyfit(x, y, 1)
# a * 1 * x^1 + b
trend = np.add(np.multiply(z[1], 1), np.multiply(range(df_dji.shape[0]), z[0]))
alt.data_transformers.disable_max_rows()
(
alt.Chart(pd.DataFrame(index=df_dji.index, data=trend, columns=['fit']).reset_index()).mark_line(color='blue').encode(
alt.X('Date:T', axis=alt.Axis(title='')),
alt.Y('fit:Q', axis=alt.Axis(title=''))
) +\
alt.Chart(df_dji.reset_index()).mark_line(color='black').encode(
alt.X('Date:T', axis=alt.Axis(title='')),
alt.Y('Close:Q', axis=alt.Axis(title='Index Value'))
)
).properties(
title='US Dow Jones Industrial',
width=750,
height=400,
background='white'
)
sp500 = yf.Ticker("^GSPC")
# get historical market data
df_sp500 = sp500.history(period="max")
#df_sp500.head()
init = df_sp500[(df_sp500.index > '1979-12-31')]
subset = init[(init.index <= '1995-12-31')]
y = subset.Close.values
x = range(subset.shape[0])
z = np.polyfit(x, y, 1)
trend = np.add(np.multiply(z[1], 1), np.multiply(range(init.shape[0]), z[0]))
(
alt.Chart(pd.DataFrame(index=init.index, data=trend, columns=['fit']).reset_index()).mark_line(color='blue').encode(
alt.X('Date:T', axis=alt.Axis(title='')),
alt.Y('fit:Q', axis=alt.Axis(title=''))
) +\
alt.Chart(init.reset_index()).mark_line(color='black').encode(
alt.X('Date:T', axis=alt.Axis(title='')),
alt.Y('Close:Q', axis=alt.Axis(title='Index Value'))
)
).properties(
title='US S&P 500',
width=750,
height=400,
background='white'
)
# CPIAUCSL
df_cpi = getSeries("CPIAUCSL", apiKey=apiKey, description="CPI")
returns = (df_sp500['Close'].resample("1M").last() / df_cpi.CPI.resample("1M").last()).dropna().reset_index()
returns.columns = ['Date', 'Adj-close']
alt.Chart(returns.reset_index()).mark_line().encode(
alt.X('Date:T'),
alt.Y('Adj-close:Q')
).properties(
title='CPI Adjusted SP500',
width=700,
height=450
)
# Compute CAGR for each period
totals = []
for i in range(0, int(returns.shape[0]-(40*12)-1)):
totals.append(dict({
"start": returns.iloc[i, 0],
"return": ((returns.iloc[(40*12)+(i), 1] / returns.iloc[i, 1])**(1/40) - 1) * 100
}))
df_totals = pd.DataFrame.from_dict(totals)
alt.Chart(df_totals).mark_line().encode(
alt.X('start:T', axis=alt.Axis(title='')),
alt.Y('return:Q', axis=alt.Axis(title='Total 40yr Return [CAGR]'))
).properties(
title='Inflation Adjusted S&P 500 Returns (CAGR) vs Start Date of Investment',
width=700,
height=450
)
hist = alt.Chart(df_totals).transform_joinaggregate(
total='count(*)'
).transform_calculate(
pct='1 / datum.total',
decade='toString(floor(year(datum.start) / 10)) + "0\'s"'
).mark_bar(opacity=0.6).encode(
alt.X("return:Q", bin=True, axis=alt.Axis(title='40yr CAGR Return [%]')),
alt.Y('sum(pct):Q', axis=alt.Axis(title='Likelihood')),
#alt.Row("decade:N")
).properties(
title='Distribution of Inflation Adjusted S&P 500 Returns (CAGR)',
width=700,
height=450
)
cumu = alt.Chart(df_totals).mark_line(color='black', interpolate='step-after').transform_joinaggregate(
total='count(*)'
).transform_calculate(
pct='1 / datum.total'
).transform_window(
frame=[None, 0],
sort=[{"field": "return"}],
cumu='sum(pct)'
).encode(
alt.X("return:Q"),
alt.Y('cumu:Q', axis=alt.Axis(title='Cumulative Likelihood'))
).properties(
title='Distribution of Inflation Adjusted S&P 500 Returns (CAGR)',
width=700,
height=450,
)
(hist + cumu).resolve_scale(y='independent').properties(background='white')
histf = alt.Chart(df_totals).transform_joinaggregate(
total='count(*)'
).transform_calculate(
pct='1 / datum.total',
decade='toString(floor(year(datum.start) / 10)) + "0\'s"'
).mark_bar(opacity=0.6).encode(
alt.X("return:Q", bin=True, axis=alt.Axis(title='40yr CAGR Return [%]')),
alt.Y('sum(pct):Q', axis=alt.Axis(title='Likelihood')),
alt.Color("decade:N"),
alt.Row("decade:N")
).properties(
title='Distribution of Inflation Adjusted S&P 500 Returns (CAGR)',
width=700,
height=450
)
histf.display()
df_spex = pd.read_html("http://www.proshares.com/funds/spxt_daily_holdings.html")[0]
#df_spex.head()
%%time
urlFor = lambda t: "https://stockrow.com/api/companies/{}/financials.xlsx?dimension=Q§ion=Balance%20Sheet&sort=desc".format(t)
def dfFor(t):
try:
tmp = pd.read_excel(urlFor(t))
tmp.columns = list(map(lambda v: v if isinstance(v, str) else "{}-{}-{}".format(v.year, v.month, v.day), tmp.columns.to_list()))
tmp = tmp.dropna().set_index("Unnamed: 0").T
tmp['Ticker'] = [t]*tmp.shape[0]
return tmp
except HTTPError:
print("{} failed".format(t))
return None
map_sp500 = map(dfFor, df_spex['Ticker Symbol'].values)
all_df = pd.concat(map_sp500)
#all_df.head()
all_df['dt'] = pd.to_datetime(all_df.index, format="%Y-%m-%d")
#all_df['dt'].head()
#all_df.dtypes
alt.data_transformers.disable_max_rows()
def violinFor(df, metric='Total Debt'):
return alt.Chart(df[['dt', metric]].dropna()).transform_calculate(
yr='year(datum.dt)'
).transform_density(
metric,
as_=['{}'.format(metric.replace(" ", "_")), 'density'],
extent=[df[metric].quantile(0.1), df[metric].quantile(0.9)],
maxsteps=1000,
groupby=['yr']
).mark_area(orient='horizontal').encode(
alt.Y('{}:Q'.format(metric.replace(" ", "_"))),
alt.Color('yr:N', title='Year'),
x=alt.X(
'density:Q',
stack='center',
impute=None,
title=None,
axis=alt.Axis(labels=False, values=[0],grid=False, ticks=True),
),
column=alt.Column(
'yr:N',
header=alt.Header(
titleOrient='bottom',
labelOrient='bottom',
labelPadding=0,
),
)
).properties(
title="Changing {} Profile of S&P 500 (ex tech)".format(metric),
width=100
)
violinFor(all_df)
violinFor(all_df, metric='Treasury Stock')
charts = []
for c in all_df.columns:
if c in ['dt', 'Ticker']:
continue
charts.append(violinFor(all_df, metric=c))
#alt.vconcat(*charts)
def chartFor(df, metric='Long Term Debt (Total)'):
summed = df.copy().reset_index().groupby("dt").sum().resample('1Q').mean()
#yoy = summed.sort_index().dropna().pct_change(12).dropna().apply(lambda v: v * 100)
return alt.Chart(summed.sort_index().reset_index()[:-1]).mark_line().encode(
alt.X("dt:T", axis=alt.Axis(title='')),
alt.Y("{}:Q".format(metric), axis=alt.Axis(title="{} [USD]".format(metric))),
tooltip=[alt.Tooltip("dt:T", format="%b %Y"), alt.Tooltip("{}:Q".format(metric))]
).properties(
title="{} trend for S&P 500 (Ex Technology)".format(metric),
width=700,
height=450
)
chartFor(all_df)
lcharts = []
for c in all_df.columns:
if c in ['dt', 'Ticker']:
continue
lcharts.append(chartFor(all_df.dropna(how='all'), metric=c))
alt.vconcat(*lcharts)
def yoyChartFor(df, metric='Long Term Debt (Total)'):
summed = df.copy().reset_index().groupby("dt").sum().resample('1Q').mean()
yoy = summed.sort_index().dropna().pct_change(4).dropna().apply(lambda v: v * 100)
return alt.Chart(yoy.sort_index().reset_index()[:-1]).mark_bar().encode(
alt.X("dt:T", axis=alt.Axis(title='')),
alt.Y("{}:Q".format(metric), axis=alt.Axis(title="{} Growth [year-over-year %]".format(metric))),
tooltip=[alt.Tooltip("dt:T", format="%b %Y"), alt.Tooltip("{}:Q".format(metric))]
).properties(
title="{} growth for S&P 500 (Ex Technology)".format(metric),
width=700,
height=450
)
yoyChartFor(all_df)
ycharts = []
for c in all_df.columns:
if c in ['dt', 'Ticker']:
continue
ycharts.append(yoyChartFor(all_df.dropna(how='all'), metric=c))
alt.vconcat(*ycharts)
yoyChartFor(all_df, 'Pension and Post-Retirement Liabilities')
yoyChartFor(all_df, 'Liabilities (Preferred Stock)')
yoyChartFor(all_df, 'Shares (Common)')
chartFor(all_df, 'Shares (Common)')
chartFor(all_df, 'Goodwill and Intangible Assets (Total)')
chartFor(all_df, 'Dividends Payable')
chartFor(all_df, 'Property, Plant, Equpment (Net)')
chartFor(all_df, 'Receivables')
chartFor(all_df, 'Inventory')
chartFor(all_df, 'Receivables') + chartFor(all_df, 'Accounts Payable')
chartFor(all_df, 'Total Assets') + chartFor(all_df, 'Total liabilities')
chartFor(all_df, 'Treasury Stock')
chartFor(all_df, 'Cash and Short Term Investments')
Exported from analysis/sp500-returns-and-performance.ipynb committed by GitHub Action on Sat Feb 29 00:08:42 2020 revision 2, 9870fd8